% Compute GEV MLEs, P-GP MLEs and Nyblom Statistic 
clear all;
small = 1.0e-10;
pinv_tol = 1.0e-05;
big = 1.0e+8;

% Add new directory to path to find functions
addpath('../Matlab_Functions/');

% -- File Directories   
outdir = 'out/';
figdir = 'fig/';
matdir = 'mat/';
cv_dir = '/Users/mwatson/Dropbox/Shared_Folders/TVExtremes/cvs/';

% Identifiers for files 
% -------------------------- City Size -------------------
application_str = 'FirmSize';
data_fname = [matdir 'FirmSize_top100_byYear.mat'];
load(data_fname);
emp_all = emp_largest_normalized;
yr = calvec;
yr_use = [1950 1973 1996 2019]';
yr = calvec;
k_max = 100;
emp_use = NaN(length(yr_use),k_max);
for i = 1:length(yr_use)
    yr_idx = find(yr==yr_use(i));
    emp_use(i,:) = emp_all(yr_idx,1:k_max);
end
Y_data = emp_use;
% -------------------------------------------------------

kvec = [10 20 30 40 50 60 70 80 90 100]';
n_k = length(kvec);

% Matrices for Storing Results
xsi_mle_mat = NaN(n_k,1);
sigma_mle_mat = NaN(n_k,1);
mu_mle_mat = NaN(n_k,1);
se_xsi_mle_mat = NaN(n_k,1);
se_sigma_mle_mat = NaN(n_k,1);
se_mu_mle_mat = NaN(n_k,1);
L_Stat_mat = NaN(n_k,1);
pv_L_Stat_mat = NaN(n_k,1);
adj_L_Stat_mat = NaN(n_k,1);
Nyblom_stat_adj_mat = NaN(n_k,1);
Stat_con_mu_mat = NaN(n_k,1);
Stat_con_xi_mu_mat = NaN(n_k,1);
CI_xsi_mat = NaN(n_k,2);
CI_x90_mat = NaN(n_k,2);

outfname = [outdir application_str '_LikelihoodResults.txt'];
fid = fopen(outfname,'w');
fprintf(fid,'Data: %s\n',data_fname);
fprintf(fid,'T = %3i \n',size(Y_data,1));

% Set options for optimizers
options_fminunc = optimset('Display','off');
options_fminsearch = optimset('Display','off');

for ik = 1:n_k
  k = kvec(ik);
  Y = Y_data(:,1:k);
  T = size(Y,1); 
 
  % Starting Values to use for MLE 
  xsi_start = 0.5;
  sigma_start = std(Y(:));
  mu_start = mean(Y(:));

   % Find MLE from GEV distribution: Order of parameters is xsi, sigma, mu 
   x_start = [xsi_start sigma_start mu_start]; % Starting Values
   f_unc = @(x) minus_llf_gev(x,Y);
   [x_1,fval_1]=fminunc(f_unc,x_start,options_fminunc); 
   [x_2,fval_2] = fminsearch(f_unc,x_start,options_fminsearch);  
   if fval_1 < fval_2
       x_mle = x_1;
       fval = fval_1;
   else
       x_mle = x_2;
       fval = fval_2;
   end
   llf_unconstrained = -fval;
   xsi_mle = x_mle(1); 
   sigma_mle = x_mle(2);
   mu_mle = x_mle(3);
 
   % Compute Score and Hessian
  [score_mat_unc,info_op_unc,info_h_unc] = score_info_GEV_xi_sigma_mu(Y,xsi_mle,sigma_mle,mu_mle);
  score_mat_unc = score_mat_unc - mean(score_mat_unc);    % score_mat_unc should have mean zero ... impose this

  T = size(Y,1);
  info = info_h_unc;
  cov_mle = inv(info)/T;
  se_mle = sqrt(diag(cov_mle));
  se_xsi_mle = se_mle(1);
  se_sigma_mle = se_mle(2);
  se_mu_mle = se_mle(3);
 
   % Compute Nyblom Statistics 
   L_Stat = compute_nyblom(score_mat_unc,info_h_unc);
   pv_L_Stat = pvalue_LStat(L_Stat,length(x_mle));  % Traditional p-value -- regular asymptotics
   % Read in parameters for adjustment factor
   fname = [cv_dir 'cvx_test0_' num2str(k) '_' num2str(T) '.csv'];
   c_parm = readmatrix(fname);
   adj = exp(c_parm(1) + c_parm(2)*xsi_mle + c_parm(3)*xsi_mle^2);
   adj_L_Stat = adj*L_Stat;

   % Joint test of constant parameters and xsi = 1
  % Compute the estimates of mu, sigma under the constraint xsi = 1
  x_start = [1 0];
  xsi_con = 1.0;
  f_con_xsi_eq_1 = @(x) minus_llf_gev_con_xsi(x,Y,xsi_con);
  [x_1,fval_1] = fminunc(f_con_xsi_eq_1,x_start,options_fminunc);
  [x_2,fval_2] = fminsearch(f_con_xsi_eq_1,x_start,options_fminsearch); 
  if fval_1 < fval_2
    x_mle_constained_xsi_eq_1 = x_1;
  else
    x_mle_constained_xsi_eq_1 = x_1;
  end
  sigma_mle_constrained_xsi_eq_1 = x_mle_constained_xsi_eq_1(1);
  mu_mle_constrained_xsi_eq_1 = x_mle_constained_xsi_eq_1(2);
  xsi_mle_constrained_xsi_eq_1 = 1.0;
  % Compute the score under the constraint
  score_mat_con_xsi_eq_1 = score_GEV_xi_sigma_mu(Y,xsi_mle_constrained_xsi_eq_1, sigma_mle_constrained_xsi_eq_1, mu_mle_constrained_xsi_eq_1);
  
  % Final 2 elements should have mean zero ... Impose this 
%   score_mat_con_xsi_eq_1(:,2:3) = score_mat_con_xsi_eq_1(:,2:3) - mean(score_mat_con_xsi_eq_1(:,2:3));   
  % Nyblom Statistic
  L_Stat_con_xsi_eq_1 = compute_nyblom(score_mat_con_xsi_eq_1,info_h_unc);
  % LM Statistic that xsi = 1
  V = info_h_unc;
  Vinv = inv(V);
  SumScore = sum(score_mat_con_xsi_eq_1);
  LM_stat_xsi_eq_1 = SumScore*Vinv*SumScore'/T;
  % Combined Statistic
  TestStat_constant_and_xsi_eq_1 = L_Stat_con_xsi_eq_1+LM_stat_xsi_eq_1;
  % Compute the p-value for the test
  fprintf('k = %3i \n',k);
  fprintf('Computing the p-value for the joint test of constant parameters and xsi = 1 \n');
  tic
  [pv_TestStat_constant_and_xsi_eq_1,pv_frac_not_pd] = pvalue_TestStat_constant_xsi_eq_1_GEV(TestStat_constant_and_xsi_eq_1,k,T);
  toc
 
   % Construct Test that mu = sigma/xsi
   x_start = [xsi_mle sigma_mle];
   f_con_mu = @(x) minus_llf_gev_con_mu(x,Y);
   [x_1,fval_1] = fminunc(f_con_mu,x_start,options_fminunc);
   [x_2,fval_2] = fminsearch(f_con_mu,x_start,options_fminsearch); 
   if fval_1 < fval_2
       x_mle_constained_mu = x_1;
       fval = fval_1;
   else
       x_mle_constained_mu = x_2;
       fval = fval_2;
   end
   llf_constrained_mu = -fval;
   %  Compute adnjustment factor for test statistic
   fname = [cv_dir 'cvx_test3_' num2str(k) '_' num2str(T) '.csv'];
   c_parm = readmatrix(fname);
   adj = exp(c_parm(1) + c_parm(2)*xsi_mle + c_parm(3)*xsi_mle^2);
   llf_dif = llf_unconstrained-llf_constrained_mu;
   Stat_con_mu = adj*llf_dif; 
 
   % Construct Test that mu = sigma/xsi and xsi = 1
   x_start = [sigma_mle];
   f_con_xsi_mu = @(x) minus_llf_gev_con_xsi_mu(x,Y);
   [x_1,fval_1] = fminunc(f_con_xsi_mu,x_start,options_fminunc);
   [x_2,fval_2] = fminsearch(f_con_xsi_mu,x_start,options_fminsearch);
   if fval_1 < fval_2
       x_mle_constained_xsi_mu = x_1;
       fval = fval_1;
   else
       x_mle_constained_xsi_mu = x_2;
       fval = fval_2;
   end
   llf_constrained_xsi_mu = -fval;
   %  Compute adnjustment factor for test statistic
   fname = [cv_dir 'cvx_test4_' num2str(k) '_' num2str(T) '.csv'];
   c_parm = readmatrix(fname);
   adj = exp(c_parm(1) + c_parm(2)*xsi_mle + c_parm(3)*xsi_mle^2);
   llf_dif = llf_unconstrained-llf_constrained_xsi_mu;
   Stat_con_xsi_mu = adj*llf_dif; 
 
 % Construct Confidence Interval for xsi;
 % Form CI for xsi
 % Step 1 -- read in grid of values of xsi and critical values and construct expanded grid
 %   fname = [cv_dir 'cvx_test1_' str_cvs '_' num2str(T) '.csv'];
 fname = [cv_dir 'cvs_test1_' num2str(k) '_' num2str(T) '.csv'];
 tmp = readmatrix(fname);
 xsi_grid_file = tmp(1,:)';
 c_grid_file = tmp(2,:)';
 % Compute Grid of values for xsi for evaluation
 xsi_min = min(xsi_grid_file);
 xsi_max = max(xsi_grid_file);
 n_xsi = 100;
 xsi_grid = linspace(xsi_min,xsi_max,n_xsi);
 critical_value_xsi_grid = interp1(xsi_grid_file,c_grid_file,xsi_grid)';
 xsi_grid = xsi_grid';
 % Step 2 construct log-likelihood value for each value in Xsi Grid
 llf_vec_con_xsi = NaN(n_xsi,1);
 x_start = [sigma_start mu_start]; % Starting Values
 for i = 1:n_xsi
     xi_con = xsi_grid(i);
     f_con_xsi = @(x) minus_llf_gev_con_xsi(x,Y,xi_con);
     [~,fval_1]=fminunc(f_con_xsi,x_start,options_fminunc);
     [~,fval_2]=fminsearch(f_con_xsi,x_start,options_fminsearch); 
     if fval_1 < fval_2
         fval = fval_1;
     else
         fval = fval_2;
     end
     llf_vec_con_xsi(i)=-fval;
 end
 llf_dif = llf_unconstrained - llf_vec_con_xsi;
 ii = llf_dif < critical_value_xsi_grid;
 tmp = xsi_grid(ii==1);
 CI_xsi_lower = min(tmp);
 CI_xsi_upper = max(tmp);
 
 % Construct a confidence interval for x_90 -- GEV 90th quantile
   % Get MLE
   x_90_mle = gevinv(0.9,xsi_mle,sigma_mle,mu_mle);
   % Adjustment factor
   fname = [cv_dir 'cvx_test2_' num2str(k) '_' num2str(T) '.csv'];
   c_parm = readmatrix(fname);
   adj = exp(c_parm(1) + c_parm(2)*xsi_mle + c_parm(3)*xsi_mle^2);
   
   % Step 1: Input a grid of values 
   q_min = 0.5;
   q_max = 20.0;
   n_q = 500;
   q_grid = linspace(q_min,q_max,n_q)';
   % Step 2 construct log-likelihood value for each value in Xsi Grid
   llf_vec_con_q = NaN(n_q,1);
   % x_start = [sigma_start mu_start]; % Starting Values
   x_start = [xsi_mle sigma_mle];
   for i = 1:n_q
      x_90_con = q_grid(i);
      f_con_x_90 = @(x) minus_llf_gev_con_x_90(x,Y,x_90_con);
      [x_1,fval_1]=fminunc(f_con_x_90,x_start,options_fminunc);
      [x_2,fval_2]=fminsearch(f_con_x_90,x_start,options_fminsearch);
      if fval_1 < fval_2
          fval = fval_1;
          x_1 = x_1;
      else
          fval = fval_2;
          x_1 = x_2;
      end
      llf_vec_con_q(i)=-fval;
      x_start = x_1;
   end
   llf_dif = llf_unconstrained - llf_vec_con_q;
   stat = adj*llf_dif;
   ii = stat < 1.0;
   tmp = q_grid(ii==1);
   CI_x90_lower = min(tmp);
   CI_x90_upper = max(tmp);
   % Make sure these are not at bounds 
   if CI_x90_lower == q_min
       CI_x90_lower = NaN;
   end
   if CI_x90_upper == q_max
       CI_x90_upper = NaN;
   end
  
% Save Results to output file

fprintf(fid,'\n\n k = %3i \n',k);
fprintf(fid,'MLEs with Hessian-based SEs (not correct, but nice to see) \n');
fprintf(fid,'xsi_mle = %6.4f (%6.4f) \n',[xsi_mle se_xsi_mle]);
fprintf(fid,'sigma_mle = %6.4f (%6.4f) \n',[sigma_mle se_sigma_mle]);
fprintf(fid,'mu_mle = %6.4f (%6.4f) \n',[mu_mle se_mu_mle]);
fprintf(fid,'MLEs 90th quantile of GEV: %6.2f \n',x_90_mle);
fprintf(fid,'95 percent CI for Xsi: %6.2f to %6.2f \n',[CI_xsi_lower CI_xsi_upper]);
fprintf(fid,'95 percent CI for 90th GEV quantile: %6.2f to %6.2f \n',[CI_x90_lower CI_x90_upper]);
fprintf(fid,'Some Test Statistics (5 percent CVs are 1.0) \n');
fprintf(fid,'   Test that mu = sigma/xsi: %6.2f \n',Stat_con_mu);
fprintf(fid,'   Test that mu = sigma/xsi and xsi = 1: %6.2f \n',Stat_con_xsi_mu);
fprintf(fid,'   Test for constant parameters (Nyblom)  %6.2f \n',adj_L_Stat);
fprintf(fid,'   pvalue for Joint Test for constant parameters (Nyblom) and xsi = 1  %6.2f \n',pv_TestStat_constant_and_xsi_eq_1);

  
end

fclose(fid);




